1 Foreword

This vignette is based on a different approach : instead of focusing on genes features, we are here interested on probes itselves, more particulary on their variability.

2 Variability plot

We introduce here a variability plot taking as input the list of targeted genes (eg : superup deregulated genes), the data for both tumoral and healthy tissues (splitted), the probes platform, the window used for indexing probes (it does use get_features) and any options the user wants to pass in the plot function.

variability_plot<- function(targeted_genes = penda_superup_deregulated,
                            meth_tumoral_data = meth_tumoral,
                            meth_healthy_data = meth_normal,
                            meth_platform = meth_lusc$platform,
                            window = c(2500,2500),
                            ...)
{
  
  
  ## Data manipulation
  
  features <- get_features(targeted_genes, study = trscr_lusc, up_str = window[2], dwn_str = window[1])
  meth_tumoral_no_sex_chr <- meth_tumoral_data[intersect(rownames(meth_platform[which(meth_platform[,1] != "chrX"
                                                                                      & meth_platform[,1] != "chrY"),]),
                                                         rownames(meth_tumoral_data)),]
  meth_healthy_no_sex_chr <- meth_healthy_data[intersect(rownames(meth_platform[which(meth_platform[,1] != "chrX"
                                                                                      & meth_platform[,1] != "chrY"),]),
                                                         rownames(meth_healthy_data)),]
  
  healthy_of_interest <- meth_healthy_no_sex_chr[intersect(unique(unlist(feat_indexed_probes)),rownames(meth_healthy_no_sex_chr)),]
  tumoral_of_interest <- meth_tumoral_no_sex_chr[intersect(unique(unlist(feat_indexed_probes)),rownames(meth_tumoral_no_sex_chr)),]
  
  ## Get values
  
  sd_tumoral <-apply(tumoral_of_interest,1,sd,na.rm=T)
  mean_tumoral <- apply(tumoral_of_interest,1,mean,na.rm=T)
  rsd_tumoral <- apply(tumoral_of_interest,1,rsd,na.rm=T)
  
  
  sd_healthy<- apply(healthy_of_interest,1,sd,na.rm=T)
  mean_healthy<-apply(healthy_of_interest,1,mean,na.rm=T)
  rsd_healthy <- apply(healthy_of_interest,1,rsd,na.rm=T)
  
  ## Plot
  
  plot(mean_healthy,
       sd_healthy,
       col="blue",
       ylim=c(0,max(c(max(sd_healthy,na.rm=T),max(sd_tumoral,na.rm=T)))+0.05),
       xlab = paste0("mean per probe","\n","(n probes = ",length(sd_healthy),"  window : ","[",window[1],",",window[2],"]"," )"),
       ylab = "sd per probe",
       ...)
  
  points(mean_tumoral,sd_tumoral,col="red")
  
  abline(v = c(mean(mean_healthy,na.rm=T),mean(mean_tumoral,na.rm=T)),col=c("blue","red"),lty = 3)
  abline(h = c(mean(sd_healthy,na.rm=T),mean(sd_tumoral,na.rm=T)),col=c("blue","red"),lty = 3)
  
  arrows(mean_healthy,sd_healthy,mean_tumoral,sd_tumoral,
         length=0.1,
         lwd=0.3)
  
  legend("topright",
         col=c("blue","red"),
         legend=c("mean value healthy tissues","mean value tumoral tissues"),
         lty = 3)
        
  
  ## get a table to return for further investigation 
  
  ret <- cbind(mean_healthy,sd_healthy,rsd_healthy,mean_tumoral,sd_tumoral,rsd_tumoral)
  
  return(ret)
  
  
}
vals_superup<-variability_plot(penda_superup_deregulated,main = "Superup genes probes")

vals_superdown<-variability_plot(penda_superdown_deregulated,main = "Superdown genes probes")

vals_supercons<-variability_plot(penda_superconserved,main = "Supercons genes probes")

3 Variability split plot

Once we visualized variability for each set of genes associated probes, i thought it was interesting to split the plot into two pannel to ensure that both limits of the plot had the same behavior.

variability_split_plot <- function(variability_values = vals,
                                   split_treshold = 0.5,
                                   split_side = 1,
                                   main = "Superup genes"){
  
  if(split_side !=1 & split_side != 2){stop("split_side must be either 1 or 2")}  
  
  if(split_side == 1){
    vals_lower <- variability_values[which(variability_values[,1] <= split_treshold),]
    vals_upper <- variability_values[which(variability_values[,1] > 1-split_treshold),]
  }
  
  if(split_side == 2){
    vals_lower <- variability_values[which(variability_values[,1] < 1-split_treshold),]
    vals_upper <- variability_values[which(variability_values[,1] >= split_treshold),]
  }

  
  par(mfrow=c(1,2))
  
  plot(vals_lower[,1],
       vals_lower[,2],
       col="blue",
       ylim=c(0,max(c(max(variability_values[,2],na.rm=T),max(variability_values[,5],na.rm=T)))),
       xlab = paste0("mean per probe","\n","(n probes = ",length(vals_lower[,2]),")"),
       ylab = "sd per probe")
  
  points(vals_lower[,4],vals_lower[,5],col="red")
  abline(v = c(mean(vals_lower[,1],na.rm=T),mean(vals_lower[,4],na.rm=T)),col=c("blue","red"),lty = 3)
  abline(h = c(mean(vals_lower[,2],na.rm=T),mean(vals_lower[,5],na.rm=T)),col=c("blue","red"),lty = 3)
  
  arrows(vals_lower[,1],vals_lower[,2],vals_lower[,4],vals_lower[,5],
         length=0.1,
         lwd=0.3)
  
  
  
  plot(vals_upper[,1],
       vals_upper[,2],
       col="blue",
       ylim=c(0,max(c(max(variability_values[,2],na.rm=T),max(variability_values[,5],na.rm=T)))),
       xlab = paste0("mean per probe","\n","(n probes = ",length(vals_upper[,2]),")"),
       ylab = "sd per probe")
  
  points(vals_upper[,4],vals_upper[,5],col="red")
  
  abline(v = c(mean(vals_upper[,1],na.rm=T),mean(vals_upper[,4],na.rm=T)),col=c("blue","red"),lty = 3)
  abline(h = c(mean(vals_upper[,2],na.rm=T),mean(vals_upper[,5],na.rm=T)),col=c("blue","red"),lty = 3)
  
  arrows(vals_upper[,1],vals_upper[,2],vals_upper[,4],vals_upper[,5],
         length=0.1,
         lwd=0.3)
  
  mtext(main, line=-2, side=3, outer=TRUE, cex=2)
  
}
variability_split_plot(vals_superup,main = "Superup genes probes")

variability_split_plot(vals_superdown,main = "Superdown genes probes")

variability_split_plot(vals_supercons,main = "Supercons genes probes")

4 Using a larger width to index probes

We see a difference of behavior between methylated superup probes and unmethylated superup probes. But since there is less probes, it could be not significant. Trying to capture more probes can be interesting to get better asymptotic properties.

vals_superup<-variability_plot(penda_superup_deregulated,main = "Superup genes probes",window = c(5000,5000))

vals_superdown<-variability_plot(penda_superdown_deregulated,main = "Superdown genes probes",window = c(5000,5000))

vals_supercons<-variability_plot(penda_superconserved,main = "Supercons genes probes", window = c(5000,5000))

variability_split_plot(vals_superup,main = "Superup genes probes")

variability_split_plot(vals_superdown,main = "Superdown genes probes")

variability_split_plot(vals_supercons,main = "Supercons genes probes")